problem 1a

data <- read.delim("~/Desktop/webtraffic.txt")
col_total <- colSums(data)
Traffic <- matrix(col_total, nrow = 9, ncol = 9, byrow = TRUE)
Traffic[9,1] = 1000
Traffic
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
##  [1,]    0  447  553    0    0    0    0    0    0
##  [2,]    0   23  230  321    0    0    0    0   63
##  [3,]    0  167   43  520    0    0    0    0   96
##  [4,]    0    0    0   44  158  312  247    0  124
##  [5,]    0    0    0    0   22   52   90  127  218
##  [6,]    0    0    0    0   67   21    0  294   97
##  [7,]    0    0    0    0    0   94    7  185   58
##  [8,]    0    0    0    0  262    0    0   30  344
##  [9,] 1000    0    0    0    0    0    0    0    0

problem 1b

Directed Graph

Directed Graph

The Markov Chain is irreducible because all states communicate with each other. The Markov Chain is ergodic beacuse it is recurrent and aperiodic.

problem 1c

row_total <- rowSums(Traffic)
P = Traffic / row_total
P
##       [,1]       [,2]       [,3]       [,4]      [,5]       [,6]       [,7]
##  [1,]    0 0.44700000 0.55300000 0.00000000 0.0000000 0.00000000 0.00000000
##  [2,]    0 0.03610675 0.36106750 0.50392465 0.0000000 0.00000000 0.00000000
##  [3,]    0 0.20217918 0.05205811 0.62953995 0.0000000 0.00000000 0.00000000
##  [4,]    0 0.00000000 0.00000000 0.04971751 0.1785311 0.35254237 0.27909605
##  [5,]    0 0.00000000 0.00000000 0.00000000 0.0432220 0.10216110 0.17681729
##  [6,]    0 0.00000000 0.00000000 0.00000000 0.1398747 0.04384134 0.00000000
##  [7,]    0 0.00000000 0.00000000 0.00000000 0.0000000 0.27325581 0.02034884
##  [8,]    0 0.00000000 0.00000000 0.00000000 0.4119497 0.00000000 0.00000000
##  [9,]    1 0.00000000 0.00000000 0.00000000 0.0000000 0.00000000 0.00000000
##             [,8]      [,9]
##  [1,] 0.00000000 0.0000000
##  [2,] 0.00000000 0.0989011
##  [3,] 0.00000000 0.1162228
##  [4,] 0.00000000 0.1401130
##  [5,] 0.24950884 0.4282908
##  [6,] 0.61377871 0.2025052
##  [7,] 0.53779070 0.1686047
##  [8,] 0.04716981 0.5408805
##  [9,] 0.00000000 0.0000000

problem 1d

a <- c(1, rep(0,8))
prob5 = a %*% P %*% P %*% P %*%P %*% P
prob5[5]
## [1] 0.1315178

The probability of a vistor being on Page 5 after 5 clicks is 0.1315178

problem 1e

Q <- t(P) - diag(9)
Q[9, ] <- rep(1,9)
rhs <- c(rep(0,8),1)
Pi <- solve (Q, rhs)
Pi
## [1] 0.15832806 0.10085497 0.13077897 0.14012033 0.08058898 0.07583914 0.05446485
## [8] 0.10069664 0.15832806
B <- P[1:8, 1:8]
Q <- diag(8) - B
rhs <- c(0.1,2,3,5,5,3,3,2)
m <- solve(Q,rhs)
m[1]
## [1] 14.563

The average time a vistor spends on the website is 14.563

Problem 2a

\[ n \geq \frac {\frac{1}{\lambda^2}} {{{(10^{-3}})^2}*0.01} \] \[ n \geq\frac{10^7}{\lambda^2} \]

Problem 2b

set.seed(400)
n1 <- 10000000/ (1^2)
n2 <- 10000000/ (2^2)
n3 <- 10000000/ (4^2)
x1 <- runif(n1,0,1)
x2 <- runif(n2,0,1)
x3 <- runif(n3,0,1)
y1 <- -log(x1)/1
y2 <- -log(x2)/2
y3 <- -log(x3)/4
i1 <- sum(sin(y1)) / n1
i2 <- sum(sin(y2)/2) / n2
i3 <- sum(sin(y3)/4) / n3
i1
## [1] 0.499993
i2
## [1] 0.199819
i3
## [1] 0.05892617

probelm 3a

Metropolis-Hastings

3b

q <- function(x, lambda){
  return(lambda * exp(-lambda * x))
}

f <- function(x, k=2, theta = 2){
  return(x^(k-1)* exp(-x/theta))
}

x_list  = c(1, rep(0,14999))

for(t in 0:14999){
  if (t == 0){
    x = 1 
  }else {
    x = -log(runif(1,0,1)) / x_list[t+1]
  }
  a = f(x) * q(x_list[t+1], x) / (f(x_list[t+1]) * q(x,x_list[t+1]))
  u = runif(1,0,1)   
  if (u <= a){
    x_list[t+2] = x
  }else
    x_list[t+2] = x_list[t+1]
}

hist(x_list[seq(0,15000,100)], freq = FALSE, breaks = 30, main = "Distribution of sampling", xlab = "Sample Value", col = "RED")

sample <- rgamma(n=100, shape=2, rate=2)
hist(sample, breaks = 20, main = "Gamma Distribution", xlab = "Value")

acf(x_list[seq(5000,15000,100)])

The sample generated in Problem 3b is random due to the acf, and the shape of the distribution is matched Gamma distribution